In [ ]:
import pandas as pd
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import  GridSearchCV,KFold, cross_validate
from sklearn.linear_model import ElasticNetCV
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cufflinks as cf
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
from sklearn.exceptions import ConvergenceWarning
ConvergenceWarning('ignore')
import warnings
warnings.filterwarnings("ignore")
In [ ]:
df_train = pd.read_csv(r'/Users/allen/Desktop/MSDS/QTW/qtw_smu/Case Study 1: Linear Regression/superconduct/train.csv')
df_unique_m = pd.read_csv(r'/Users/allen/Desktop/MSDS/QTW/qtw_smu/Case Study 1: Linear Regression/superconduct/unique_m.csv')
In [ ]:
#merge two dataframes on indexes
df_merge = pd.merge(df_train, df_unique_m, left_index=True, right_index=True)
In [ ]:
#delete duplicate and unused column
df_merge = df_merge.drop(['critical_temp_y','material'], axis=1)

#rename column from merge
df_merge.rename(columns = {'critical_temp_x':'critical_temp'}, inplace = True)
In [ ]:
df_merge.head()
Out[ ]:
number_of_elements mean_atomic_mass wtd_mean_atomic_mass gmean_atomic_mass wtd_gmean_atomic_mass entropy_atomic_mass wtd_entropy_atomic_mass range_atomic_mass wtd_range_atomic_mass std_atomic_mass ... Ir Pt Au Hg Tl Pb Bi Po At Rn
0 4 88.944468 57.862692 66.361592 36.116612 1.181795 1.062396 122.90607 31.794921 51.968828 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0
1 5 92.729214 58.518416 73.132787 36.396602 1.449309 1.057755 122.90607 36.161939 47.094633 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0
2 4 88.944468 57.885242 66.361592 36.122509 1.181795 0.975980 122.90607 35.741099 51.968828 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0
3 4 88.944468 57.873967 66.361592 36.119560 1.181795 1.022291 122.90607 33.768010 51.968828 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0
4 4 88.944468 57.840143 66.361592 36.110716 1.181795 1.129224 122.90607 27.848743 51.968828 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0

5 rows × 168 columns

In [ ]:
#show all columns..
pd.set_option('display.max_columns', None)
In [ ]:
df_train.head()
Out[ ]:
number_of_elements mean_atomic_mass wtd_mean_atomic_mass gmean_atomic_mass wtd_gmean_atomic_mass entropy_atomic_mass wtd_entropy_atomic_mass range_atomic_mass wtd_range_atomic_mass std_atomic_mass wtd_std_atomic_mass mean_fie wtd_mean_fie gmean_fie wtd_gmean_fie entropy_fie wtd_entropy_fie range_fie wtd_range_fie std_fie wtd_std_fie mean_atomic_radius wtd_mean_atomic_radius gmean_atomic_radius wtd_gmean_atomic_radius entropy_atomic_radius wtd_entropy_atomic_radius range_atomic_radius wtd_range_atomic_radius std_atomic_radius wtd_std_atomic_radius mean_Density wtd_mean_Density gmean_Density wtd_gmean_Density entropy_Density wtd_entropy_Density range_Density wtd_range_Density std_Density wtd_std_Density mean_ElectronAffinity wtd_mean_ElectronAffinity gmean_ElectronAffinity wtd_gmean_ElectronAffinity entropy_ElectronAffinity wtd_entropy_ElectronAffinity range_ElectronAffinity wtd_range_ElectronAffinity std_ElectronAffinity wtd_std_ElectronAffinity mean_FusionHeat wtd_mean_FusionHeat gmean_FusionHeat wtd_gmean_FusionHeat entropy_FusionHeat wtd_entropy_FusionHeat range_FusionHeat wtd_range_FusionHeat std_FusionHeat wtd_std_FusionHeat mean_ThermalConductivity wtd_mean_ThermalConductivity gmean_ThermalConductivity wtd_gmean_ThermalConductivity entropy_ThermalConductivity wtd_entropy_ThermalConductivity range_ThermalConductivity wtd_range_ThermalConductivity std_ThermalConductivity wtd_std_ThermalConductivity mean_Valence wtd_mean_Valence gmean_Valence wtd_gmean_Valence entropy_Valence wtd_entropy_Valence range_Valence wtd_range_Valence std_Valence wtd_std_Valence critical_temp
0 4 88.944468 57.862692 66.361592 36.116612 1.181795 1.062396 122.90607 31.794921 51.968828 53.622535 775.425 1010.268571 718.152900 938.016780 1.305967 0.791488 810.6 735.985714 323.811808 355.562967 160.25 105.514286 136.126003 84.528423 1.259244 1.207040 205 42.914286 75.237540 69.235569 4654.35725 2961.502286 724.953211 53.543811 1.033129 0.814598 8958.571 1579.583429 3306.162897 3572.596624 81.8375 111.727143 60.123179 99.414682 1.159687 0.787382 127.05 80.987143 51.433712 42.558396 6.9055 3.846857 3.479475 1.040986 1.088575 0.994998 12.878 1.744571 4.599064 4.666920 107.756645 61.015189 7.062488 0.621979 0.308148 0.262848 399.97342 57.127669 168.854244 138.517163 2.25 2.257143 2.213364 2.219783 1.368922 1.066221 1 1.085714 0.433013 0.437059 29.0
1 5 92.729214 58.518416 73.132787 36.396602 1.449309 1.057755 122.90607 36.161939 47.094633 53.979870 766.440 1010.612857 720.605511 938.745413 1.544145 0.807078 810.6 743.164286 290.183029 354.963511 161.20 104.971429 141.465215 84.370167 1.508328 1.204115 205 50.571429 67.321319 68.008817 5821.48580 3021.016571 1237.095080 54.095718 1.314442 0.914802 10488.571 1667.383429 3767.403176 3632.649185 90.8900 112.316429 69.833315 101.166398 1.427997 0.838666 127.05 81.207857 49.438167 41.667621 7.7844 3.796857 4.403790 1.035251 1.374977 1.073094 12.878 1.595714 4.473363 4.603000 172.205316 61.372331 16.064228 0.619735 0.847404 0.567706 429.97342 51.413383 198.554600 139.630922 2.00 2.257143 1.888175 2.210679 1.557113 1.047221 2 1.128571 0.632456 0.468606 26.0
2 4 88.944468 57.885242 66.361592 36.122509 1.181795 0.975980 122.90607 35.741099 51.968828 53.656268 775.425 1010.820000 718.152900 939.009036 1.305967 0.773620 810.6 743.164286 323.811808 354.804183 160.25 104.685714 136.126003 84.214573 1.259244 1.132547 205 49.314286 75.237540 67.797712 4654.35725 2999.159429 724.953211 53.974022 1.033129 0.760305 8958.571 1667.383429 3306.162897 3592.019281 81.8375 112.213571 60.123179 101.082152 1.159687 0.786007 127.05 81.207857 51.433712 41.639878 6.9055 3.822571 3.479475 1.037439 1.088575 0.927479 12.878 1.757143 4.599064 4.649635 107.756645 60.943760 7.062488 0.619095 0.308148 0.250477 399.97342 57.127669 168.854244 138.540613 2.25 2.271429 2.213364 2.232679 1.368922 1.029175 1 1.114286 0.433013 0.444697 19.0
3 4 88.944468 57.873967 66.361592 36.119560 1.181795 1.022291 122.90607 33.768010 51.968828 53.639405 775.425 1010.544286 718.152900 938.512777 1.305967 0.783207 810.6 739.575000 323.811808 355.183884 160.25 105.100000 136.126003 84.371352 1.259244 1.173033 205 46.114286 75.237540 68.521665 4654.35725 2980.330857 724.953211 53.758486 1.033129 0.788889 8958.571 1623.483429 3306.162897 3582.370597 81.8375 111.970357 60.123179 100.244950 1.159687 0.786900 127.05 81.097500 51.433712 42.102344 6.9055 3.834714 3.479475 1.039211 1.088575 0.964031 12.878 1.744571 4.599064 4.658301 107.756645 60.979474 7.062488 0.620535 0.308148 0.257045 399.97342 57.127669 168.854244 138.528893 2.25 2.264286 2.213364 2.226222 1.368922 1.048834 1 1.100000 0.433013 0.440952 22.0
4 4 88.944468 57.840143 66.361592 36.110716 1.181795 1.129224 122.90607 27.848743 51.968828 53.588771 775.425 1009.717143 718.152900 937.025573 1.305967 0.805230 810.6 728.807143 323.811808 356.319281 160.25 106.342857 136.126003 84.843442 1.259244 1.261194 205 36.514286 75.237540 70.634448 4654.35725 2923.845143 724.953211 53.117029 1.033129 0.859811 8958.571 1491.783429 3306.162897 3552.668664 81.8375 111.240714 60.123179 97.774719 1.159687 0.787396 127.05 80.766429 51.433712 43.452059 6.9055 3.871143 3.479475 1.044545 1.088575 1.044970 12.878 1.744571 4.599064 4.684014 107.756645 61.086617 7.062488 0.624878 0.308148 0.272820 399.97342 57.127669 168.854244 138.493671 2.25 2.242857 2.213364 2.206963 1.368922 1.096052 1 1.057143 0.433013 0.428809 23.0
In [ ]:
df_unique_m.head()
Out[ ]:
H He Li Be B C N O F Ne Na Mg Al Si P S Cl Ar K Ca Sc Ti V Cr Mn Fe Co Ni Cu Zn Ga Ge As Se Br Kr Rb Sr Y Zr Nb Mo Tc Ru Rh Pd Ag Cd In Sn Sb Te I Xe Cs Ba La Ce Pr Nd Pm Sm Eu Gd Tb Dy Ho Er Tm Yb Lu Hf Ta W Re Os Ir Pt Au Hg Tl Pb Bi Po At Rn critical_temp material
0 0.0 0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.20 1.80 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0 29.0 Ba0.2La1.8Cu1O4
1 0.0 0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.9 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.1 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.10 1.90 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0 26.0 Ba0.1La1.9Ag0.1Cu0.9O4
2 0.0 0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.10 1.90 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0 19.0 Ba0.1La1.9Cu1O4
3 0.0 0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.15 1.85 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0 22.0 Ba0.15La1.85Cu1O4
4 0.0 0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0.0 0.30 1.70 0.0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0 23.0 Ba0.3La1.7Cu1O4
In [ ]:
df_train_shape = df_train.shape
df_unique_m_shape = df_unique_m.shape
df_merge_shape = df_merge.shape
print(f'Train DataFrame Shape: {df_train_shape}')
print(f'Unique_m DataFrame Shape:{df_unique_m_shape}')
print(f'Merged DataFrame Shape:{df_merge_shape}')
Train DataFrame Shape: (21263, 82)
Unique_m DataFrame Shape:(21263, 88)
Merged DataFrame Shape:(21263, 168)
In [ ]:
df_train.info(verbose = True)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21263 entries, 0 to 21262
Data columns (total 82 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   number_of_elements               21263 non-null  int64  
 1   mean_atomic_mass                 21263 non-null  float64
 2   wtd_mean_atomic_mass             21263 non-null  float64
 3   gmean_atomic_mass                21263 non-null  float64
 4   wtd_gmean_atomic_mass            21263 non-null  float64
 5   entropy_atomic_mass              21263 non-null  float64
 6   wtd_entropy_atomic_mass          21263 non-null  float64
 7   range_atomic_mass                21263 non-null  float64
 8   wtd_range_atomic_mass            21263 non-null  float64
 9   std_atomic_mass                  21263 non-null  float64
 10  wtd_std_atomic_mass              21263 non-null  float64
 11  mean_fie                         21263 non-null  float64
 12  wtd_mean_fie                     21263 non-null  float64
 13  gmean_fie                        21263 non-null  float64
 14  wtd_gmean_fie                    21263 non-null  float64
 15  entropy_fie                      21263 non-null  float64
 16  wtd_entropy_fie                  21263 non-null  float64
 17  range_fie                        21263 non-null  float64
 18  wtd_range_fie                    21263 non-null  float64
 19  std_fie                          21263 non-null  float64
 20  wtd_std_fie                      21263 non-null  float64
 21  mean_atomic_radius               21263 non-null  float64
 22  wtd_mean_atomic_radius           21263 non-null  float64
 23  gmean_atomic_radius              21263 non-null  float64
 24  wtd_gmean_atomic_radius          21263 non-null  float64
 25  entropy_atomic_radius            21263 non-null  float64
 26  wtd_entropy_atomic_radius        21263 non-null  float64
 27  range_atomic_radius              21263 non-null  int64  
 28  wtd_range_atomic_radius          21263 non-null  float64
 29  std_atomic_radius                21263 non-null  float64
 30  wtd_std_atomic_radius            21263 non-null  float64
 31  mean_Density                     21263 non-null  float64
 32  wtd_mean_Density                 21263 non-null  float64
 33  gmean_Density                    21263 non-null  float64
 34  wtd_gmean_Density                21263 non-null  float64
 35  entropy_Density                  21263 non-null  float64
 36  wtd_entropy_Density              21263 non-null  float64
 37  range_Density                    21263 non-null  float64
 38  wtd_range_Density                21263 non-null  float64
 39  std_Density                      21263 non-null  float64
 40  wtd_std_Density                  21263 non-null  float64
 41  mean_ElectronAffinity            21263 non-null  float64
 42  wtd_mean_ElectronAffinity        21263 non-null  float64
 43  gmean_ElectronAffinity           21263 non-null  float64
 44  wtd_gmean_ElectronAffinity       21263 non-null  float64
 45  entropy_ElectronAffinity         21263 non-null  float64
 46  wtd_entropy_ElectronAffinity     21263 non-null  float64
 47  range_ElectronAffinity           21263 non-null  float64
 48  wtd_range_ElectronAffinity       21263 non-null  float64
 49  std_ElectronAffinity             21263 non-null  float64
 50  wtd_std_ElectronAffinity         21263 non-null  float64
 51  mean_FusionHeat                  21263 non-null  float64
 52  wtd_mean_FusionHeat              21263 non-null  float64
 53  gmean_FusionHeat                 21263 non-null  float64
 54  wtd_gmean_FusionHeat             21263 non-null  float64
 55  entropy_FusionHeat               21263 non-null  float64
 56  wtd_entropy_FusionHeat           21263 non-null  float64
 57  range_FusionHeat                 21263 non-null  float64
 58  wtd_range_FusionHeat             21263 non-null  float64
 59  std_FusionHeat                   21263 non-null  float64
 60  wtd_std_FusionHeat               21263 non-null  float64
 61  mean_ThermalConductivity         21263 non-null  float64
 62  wtd_mean_ThermalConductivity     21263 non-null  float64
 63  gmean_ThermalConductivity        21263 non-null  float64
 64  wtd_gmean_ThermalConductivity    21263 non-null  float64
 65  entropy_ThermalConductivity      21263 non-null  float64
 66  wtd_entropy_ThermalConductivity  21263 non-null  float64
 67  range_ThermalConductivity        21263 non-null  float64
 68  wtd_range_ThermalConductivity    21263 non-null  float64
 69  std_ThermalConductivity          21263 non-null  float64
 70  wtd_std_ThermalConductivity      21263 non-null  float64
 71  mean_Valence                     21263 non-null  float64
 72  wtd_mean_Valence                 21263 non-null  float64
 73  gmean_Valence                    21263 non-null  float64
 74  wtd_gmean_Valence                21263 non-null  float64
 75  entropy_Valence                  21263 non-null  float64
 76  wtd_entropy_Valence              21263 non-null  float64
 77  range_Valence                    21263 non-null  int64  
 78  wtd_range_Valence                21263 non-null  float64
 79  std_Valence                      21263 non-null  float64
 80  wtd_std_Valence                  21263 non-null  float64
 81  critical_temp                    21263 non-null  float64
dtypes: float64(79), int64(3)
memory usage: 13.3 MB
In [ ]:
df_unique_m.info(verbose = True)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21263 entries, 0 to 21262
Data columns (total 88 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   H              21263 non-null  float64
 1   He             21263 non-null  int64  
 2   Li             21263 non-null  float64
 3   Be             21263 non-null  float64
 4   B              21263 non-null  float64
 5   C              21263 non-null  float64
 6   N              21263 non-null  float64
 7   O              21263 non-null  float64
 8   F              21263 non-null  float64
 9   Ne             21263 non-null  int64  
 10  Na             21263 non-null  float64
 11  Mg             21263 non-null  float64
 12  Al             21263 non-null  float64
 13  Si             21263 non-null  float64
 14  P              21263 non-null  float64
 15  S              21263 non-null  float64
 16  Cl             21263 non-null  float64
 17  Ar             21263 non-null  int64  
 18  K              21263 non-null  float64
 19  Ca             21263 non-null  float64
 20  Sc             21263 non-null  float64
 21  Ti             21263 non-null  float64
 22  V              21263 non-null  float64
 23  Cr             21263 non-null  float64
 24  Mn             21263 non-null  float64
 25  Fe             21263 non-null  float64
 26  Co             21263 non-null  float64
 27  Ni             21263 non-null  float64
 28  Cu             21263 non-null  float64
 29  Zn             21263 non-null  float64
 30  Ga             21263 non-null  float64
 31  Ge             21263 non-null  float64
 32  As             21263 non-null  float64
 33  Se             21263 non-null  float64
 34  Br             21263 non-null  float64
 35  Kr             21263 non-null  int64  
 36  Rb             21263 non-null  float64
 37  Sr             21263 non-null  float64
 38  Y              21263 non-null  float64
 39  Zr             21263 non-null  float64
 40  Nb             21263 non-null  float64
 41  Mo             21263 non-null  float64
 42  Tc             21263 non-null  float64
 43  Ru             21263 non-null  float64
 44  Rh             21263 non-null  float64
 45  Pd             21263 non-null  float64
 46  Ag             21263 non-null  float64
 47  Cd             21263 non-null  float64
 48  In             21263 non-null  float64
 49  Sn             21263 non-null  float64
 50  Sb             21263 non-null  float64
 51  Te             21263 non-null  float64
 52  I              21263 non-null  float64
 53  Xe             21263 non-null  int64  
 54  Cs             21263 non-null  float64
 55  Ba             21263 non-null  float64
 56  La             21263 non-null  float64
 57  Ce             21263 non-null  float64
 58  Pr             21263 non-null  float64
 59  Nd             21263 non-null  float64
 60  Pm             21263 non-null  int64  
 61  Sm             21263 non-null  float64
 62  Eu             21263 non-null  float64
 63  Gd             21263 non-null  float64
 64  Tb             21263 non-null  float64
 65  Dy             21263 non-null  float64
 66  Ho             21263 non-null  float64
 67  Er             21263 non-null  float64
 68  Tm             21263 non-null  float64
 69  Yb             21263 non-null  float64
 70  Lu             21263 non-null  float64
 71  Hf             21263 non-null  float64
 72  Ta             21263 non-null  float64
 73  W              21263 non-null  float64
 74  Re             21263 non-null  float64
 75  Os             21263 non-null  float64
 76  Ir             21263 non-null  float64
 77  Pt             21263 non-null  float64
 78  Au             21263 non-null  float64
 79  Hg             21263 non-null  float64
 80  Tl             21263 non-null  float64
 81  Pb             21263 non-null  float64
 82  Bi             21263 non-null  float64
 83  Po             21263 non-null  int64  
 84  At             21263 non-null  int64  
 85  Rn             21263 non-null  int64  
 86  critical_temp  21263 non-null  float64
 87  material       21263 non-null  object 
dtypes: float64(78), int64(9), object(1)
memory usage: 14.3+ MB
In [ ]:
df_merge.info(verbose = True)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21263 entries, 0 to 21262
Data columns (total 168 columns):
 #    Column                           Dtype  
---   ------                           -----  
 0    number_of_elements               int64  
 1    mean_atomic_mass                 float64
 2    wtd_mean_atomic_mass             float64
 3    gmean_atomic_mass                float64
 4    wtd_gmean_atomic_mass            float64
 5    entropy_atomic_mass              float64
 6    wtd_entropy_atomic_mass          float64
 7    range_atomic_mass                float64
 8    wtd_range_atomic_mass            float64
 9    std_atomic_mass                  float64
 10   wtd_std_atomic_mass              float64
 11   mean_fie                         float64
 12   wtd_mean_fie                     float64
 13   gmean_fie                        float64
 14   wtd_gmean_fie                    float64
 15   entropy_fie                      float64
 16   wtd_entropy_fie                  float64
 17   range_fie                        float64
 18   wtd_range_fie                    float64
 19   std_fie                          float64
 20   wtd_std_fie                      float64
 21   mean_atomic_radius               float64
 22   wtd_mean_atomic_radius           float64
 23   gmean_atomic_radius              float64
 24   wtd_gmean_atomic_radius          float64
 25   entropy_atomic_radius            float64
 26   wtd_entropy_atomic_radius        float64
 27   range_atomic_radius              int64  
 28   wtd_range_atomic_radius          float64
 29   std_atomic_radius                float64
 30   wtd_std_atomic_radius            float64
 31   mean_Density                     float64
 32   wtd_mean_Density                 float64
 33   gmean_Density                    float64
 34   wtd_gmean_Density                float64
 35   entropy_Density                  float64
 36   wtd_entropy_Density              float64
 37   range_Density                    float64
 38   wtd_range_Density                float64
 39   std_Density                      float64
 40   wtd_std_Density                  float64
 41   mean_ElectronAffinity            float64
 42   wtd_mean_ElectronAffinity        float64
 43   gmean_ElectronAffinity           float64
 44   wtd_gmean_ElectronAffinity       float64
 45   entropy_ElectronAffinity         float64
 46   wtd_entropy_ElectronAffinity     float64
 47   range_ElectronAffinity           float64
 48   wtd_range_ElectronAffinity       float64
 49   std_ElectronAffinity             float64
 50   wtd_std_ElectronAffinity         float64
 51   mean_FusionHeat                  float64
 52   wtd_mean_FusionHeat              float64
 53   gmean_FusionHeat                 float64
 54   wtd_gmean_FusionHeat             float64
 55   entropy_FusionHeat               float64
 56   wtd_entropy_FusionHeat           float64
 57   range_FusionHeat                 float64
 58   wtd_range_FusionHeat             float64
 59   std_FusionHeat                   float64
 60   wtd_std_FusionHeat               float64
 61   mean_ThermalConductivity         float64
 62   wtd_mean_ThermalConductivity     float64
 63   gmean_ThermalConductivity        float64
 64   wtd_gmean_ThermalConductivity    float64
 65   entropy_ThermalConductivity      float64
 66   wtd_entropy_ThermalConductivity  float64
 67   range_ThermalConductivity        float64
 68   wtd_range_ThermalConductivity    float64
 69   std_ThermalConductivity          float64
 70   wtd_std_ThermalConductivity      float64
 71   mean_Valence                     float64
 72   wtd_mean_Valence                 float64
 73   gmean_Valence                    float64
 74   wtd_gmean_Valence                float64
 75   entropy_Valence                  float64
 76   wtd_entropy_Valence              float64
 77   range_Valence                    int64  
 78   wtd_range_Valence                float64
 79   std_Valence                      float64
 80   wtd_std_Valence                  float64
 81   critical_temp                    float64
 82   H                                float64
 83   He                               int64  
 84   Li                               float64
 85   Be                               float64
 86   B                                float64
 87   C                                float64
 88   N                                float64
 89   O                                float64
 90   F                                float64
 91   Ne                               int64  
 92   Na                               float64
 93   Mg                               float64
 94   Al                               float64
 95   Si                               float64
 96   P                                float64
 97   S                                float64
 98   Cl                               float64
 99   Ar                               int64  
 100  K                                float64
 101  Ca                               float64
 102  Sc                               float64
 103  Ti                               float64
 104  V                                float64
 105  Cr                               float64
 106  Mn                               float64
 107  Fe                               float64
 108  Co                               float64
 109  Ni                               float64
 110  Cu                               float64
 111  Zn                               float64
 112  Ga                               float64
 113  Ge                               float64
 114  As                               float64
 115  Se                               float64
 116  Br                               float64
 117  Kr                               int64  
 118  Rb                               float64
 119  Sr                               float64
 120  Y                                float64
 121  Zr                               float64
 122  Nb                               float64
 123  Mo                               float64
 124  Tc                               float64
 125  Ru                               float64
 126  Rh                               float64
 127  Pd                               float64
 128  Ag                               float64
 129  Cd                               float64
 130  In                               float64
 131  Sn                               float64
 132  Sb                               float64
 133  Te                               float64
 134  I                                float64
 135  Xe                               int64  
 136  Cs                               float64
 137  Ba                               float64
 138  La                               float64
 139  Ce                               float64
 140  Pr                               float64
 141  Nd                               float64
 142  Pm                               int64  
 143  Sm                               float64
 144  Eu                               float64
 145  Gd                               float64
 146  Tb                               float64
 147  Dy                               float64
 148  Ho                               float64
 149  Er                               float64
 150  Tm                               float64
 151  Yb                               float64
 152  Lu                               float64
 153  Hf                               float64
 154  Ta                               float64
 155  W                                float64
 156  Re                               float64
 157  Os                               float64
 158  Ir                               float64
 159  Pt                               float64
 160  Au                               float64
 161  Hg                               float64
 162  Tl                               float64
 163  Pb                               float64
 164  Bi                               float64
 165  Po                               int64  
 166  At                               int64  
 167  Rn                               int64  
dtypes: float64(156), int64(12)
memory usage: 27.3 MB
In [ ]:
df_merge.describe()
Out[ ]:
number_of_elements mean_atomic_mass wtd_mean_atomic_mass gmean_atomic_mass wtd_gmean_atomic_mass entropy_atomic_mass wtd_entropy_atomic_mass range_atomic_mass wtd_range_atomic_mass std_atomic_mass wtd_std_atomic_mass mean_fie wtd_mean_fie gmean_fie wtd_gmean_fie entropy_fie wtd_entropy_fie range_fie wtd_range_fie std_fie wtd_std_fie mean_atomic_radius wtd_mean_atomic_radius gmean_atomic_radius wtd_gmean_atomic_radius entropy_atomic_radius wtd_entropy_atomic_radius range_atomic_radius wtd_range_atomic_radius std_atomic_radius wtd_std_atomic_radius mean_Density wtd_mean_Density gmean_Density wtd_gmean_Density entropy_Density wtd_entropy_Density range_Density wtd_range_Density std_Density wtd_std_Density mean_ElectronAffinity wtd_mean_ElectronAffinity gmean_ElectronAffinity wtd_gmean_ElectronAffinity entropy_ElectronAffinity wtd_entropy_ElectronAffinity range_ElectronAffinity wtd_range_ElectronAffinity std_ElectronAffinity wtd_std_ElectronAffinity mean_FusionHeat wtd_mean_FusionHeat gmean_FusionHeat wtd_gmean_FusionHeat entropy_FusionHeat wtd_entropy_FusionHeat range_FusionHeat wtd_range_FusionHeat std_FusionHeat wtd_std_FusionHeat mean_ThermalConductivity wtd_mean_ThermalConductivity gmean_ThermalConductivity wtd_gmean_ThermalConductivity entropy_ThermalConductivity wtd_entropy_ThermalConductivity range_ThermalConductivity wtd_range_ThermalConductivity std_ThermalConductivity wtd_std_ThermalConductivity mean_Valence wtd_mean_Valence gmean_Valence wtd_gmean_Valence entropy_Valence wtd_entropy_Valence range_Valence wtd_range_Valence std_Valence wtd_std_Valence critical_temp H He Li Be B C N O F Ne Na Mg Al Si P S Cl Ar K Ca Sc Ti V Cr Mn Fe Co Ni Cu Zn Ga Ge As Se Br Kr Rb Sr Y Zr Nb Mo Tc Ru Rh Pd Ag Cd In Sn Sb Te I Xe Cs Ba La Ce Pr Nd Pm Sm Eu Gd Tb Dy Ho Er Tm Yb Lu Hf Ta W Re Os Ir Pt Au Hg Tl Pb Bi Po At Rn
count 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.0 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.0 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.0 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.0 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.0 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.0 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.000000 21263.0 21263.0 21263.0
mean 4.115224 87.557631 72.988310 71.290627 58.539916 1.165608 1.063884 115.601251 33.225218 44.391893 41.448045 769.614748 870.442317 737.474751 832.769649 1.299172 0.926726 572.222612 483.517264 215.631279 224.050033 157.983101 134.720039 144.448738 120.989330 1.267756 1.131138 139.325025 51.369885 51.601267 52.340465 6111.465214 5267.188547 3460.692235 3117.241110 1.072425 0.856037 8665.438818 2902.736814 3416.910784 3319.170628 76.879751 92.717486 54.359502 72.416225 1.070250 0.770757 120.730514 59.332637 48.912207 44.409356 14.296113 13.848001 10.136977 10.141161 1.093343 0.914065 21.138994 8.218528 8.323333 7.717576 89.706911 81.549080 29.841727 27.308061 0.727630 0.539991 250.893443 62.033066 98.943993 96.234051 3.198228 3.153127 3.056536 3.055885 1.295682 1.052841 2.041010 1.483007 0.839342 0.673987 34.421219 0.017685 0.0 0.012125 0.034638 0.142594 0.384968 0.013284 3.009129 0.014874 0.0 0.008892 0.026772 0.061678 0.189889 0.028143 0.106246 0.009050 0.0 0.016042 0.258347 0.010919 0.156817 0.224782 0.006119 0.003191 0.153182 0.035323 0.090182 1.276751 0.014034 0.073997 0.082556 0.155197 0.078662 0.003940 0.0 0.007799 0.326909 0.177556 0.370901 0.442349 0.146367 0.002291 0.055325 0.068072 0.085034 0.007834 0.009152 0.049468 0.120994 0.101269 0.040491 0.004744 0.0 0.004129 0.568440 0.264953 0.030662 0.041494 0.039666 0.0 0.021992 0.017821 0.023959 0.002857 0.009536 0.008832 0.014217 0.008909 0.012716 0.026849 0.009168 0.036086 0.010424 0.038206 0.022512 0.061558 0.034108 0.020535 0.036663 0.047954 0.042461 0.201009 0.0 0.0 0.0
std 1.439295 29.676497 33.490406 31.030272 36.651067 0.364930 0.401423 54.626887 26.967752 20.035430 19.983544 87.488694 143.278200 78.327275 119.772520 0.381935 0.334018 309.614442 224.042874 109.966774 127.927104 20.147288 28.801567 22.090958 35.837843 0.375411 0.407159 67.272228 35.019356 22.898396 25.294524 2846.785185 3221.314506 3703.256370 3975.122587 0.342356 0.319761 4097.126831 2398.471020 1673.624915 1611.799629 27.701890 32.276387 29.007425 31.648444 0.343391 0.285986 58.700327 28.620409 21.740521 20.429293 11.300188 14.279335 10.065901 13.134007 0.375932 0.370125 20.370620 11.414066 8.671651 7.288239 38.517485 45.519256 34.059581 40.191150 0.325976 0.318248 158.703557 43.123317 60.143272 63.710355 1.044611 1.191249 1.046257 1.174815 0.393155 0.380291 1.242345 0.978176 0.484676 0.455580 34.254362 0.267220 0.0 0.129552 0.848541 1.044486 4.408032 0.150427 3.811649 0.132119 0.0 0.101685 0.271606 1.126254 2.217277 0.466710 0.760821 0.119717 0.0 0.138187 0.902732 0.185651 2.728139 3.407763 0.254272 0.129449 0.713075 0.580672 0.982521 2.079375 0.403316 1.115005 1.021279 1.076049 0.676294 0.083907 0.0 0.121254 0.763625 0.429953 4.846459 4.848246 2.084302 0.064728 0.770327 1.005898 1.554018 0.167831 0.688729 0.521820 1.886951 1.839020 0.718043 0.088480 0.0 0.077676 0.983288 2.320822 0.173147 1.282059 0.224657 0.0 0.183173 0.151433 0.155860 0.064737 0.104153 0.098728 0.131417 0.130455 0.214806 0.276861 0.208969 0.851380 0.164628 1.177476 0.282265 0.864859 0.307888 0.717975 0.205846 0.272298 0.274365 0.655927 0.0 0.0 0.0
min 1.000000 6.941000 6.423452 5.320573 1.960849 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 375.500000 375.500000 375.500000 375.500000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 48.000000 48.000000 48.000000 48.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.429000 1.429000 1.429000 0.686245 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.500000 1.500000 1.500000 1.500000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.222000 0.222000 0.222000 0.222000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.026580 0.026580 0.026580 0.022952 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000210 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0
25% 3.000000 72.458076 52.143839 58.041225 35.248990 0.966676 0.775363 78.512902 16.824174 32.890369 28.539377 723.740000 738.946339 692.541331 720.108284 1.085871 0.753757 262.400000 291.088889 114.135763 92.994286 149.333333 112.127359 133.542493 89.210097 1.066389 0.852181 80.000000 28.598137 35.112518 32.016958 4513.500000 2999.158291 883.117278 66.746836 0.913959 0.688693 6648.000000 1656.847429 2819.497063 2564.342926 62.090000 73.350000 33.700512 50.772124 0.890589 0.660662 86.700000 34.036000 38.372410 33.440123 7.588667 5.033407 4.109978 1.322127 0.833333 0.672732 12.878000 2.329309 4.261340 4.603491 61.000000 54.180953 8.339818 1.087284 0.457810 0.250677 86.382000 29.349419 37.933172 31.985437 2.333333 2.116732 2.279705 2.091251 1.060857 0.775678 1.000000 0.921454 0.451754 0.306892 5.365000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0
50% 4.000000 84.922750 60.696571 66.361592 39.918385 1.199541 1.146783 122.906070 26.636008 45.123500 44.285984 764.900000 889.966667 727.960610 856.202765 1.356236 0.916843 764.100000 510.440000 266.373871 258.449503 160.250000 125.970297 142.807563 113.181369 1.330735 1.242878 171.000000 43.000000 58.663106 59.932929 5329.085800 4303.421500 1339.974702 1515.364631 1.090610 0.882747 8958.571000 2082.956581 3301.890502 3625.631828 73.100000 102.856863 51.470113 73.173958 1.138284 0.781205 127.050000 71.156250 51.125720 48.029866 9.304400 8.330667 5.253498 4.929787 1.112098 0.994998 12.878000 3.436400 4.948155 5.500675 96.504430 73.333333 14.287643 6.096120 0.738694 0.545783 399.795000 56.556240 135.762089 113.556983 2.833333 2.618182 2.615321 2.434057 1.368922 1.166532 2.000000 1.063077 0.800000 0.500000 20.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.900000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0
75% 5.000000 100.404410 86.103540 78.116681 73.113234 1.444537 1.359418 154.119320 38.356908 59.322812 53.629284 796.320000 1004.117384 765.715174 937.575826 1.551120 1.061750 810.600000 690.703310 297.724924 342.656991 169.857143 158.265231 155.938199 150.988640 1.512348 1.425684 205.000000 60.224491 69.424491 73.777278 6728.000000 6416.333333 5794.965188 5766.015191 1.323930 1.080939 9778.571000 3409.026316 4004.273231 3959.191394 85.504167 110.738462 67.505900 89.975670 1.345894 0.877541 138.630000 76.706965 56.221787 53.320838 17.114444 18.514286 13.600037 16.428652 1.378110 1.157379 23.200000 10.498780 9.041230 8.017581 111.005316 99.062911 42.371302 47.308041 0.962218 0.777353 399.973420 91.869245 153.806272 162.711144 4.000000 4.026201 3.727919 3.914868 1.589027 1.330801 3.000000 1.918400 1.200000 1.020436 63.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 6.800000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.815000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 1.350000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0
max 9.000000 208.980400 208.980400 208.980400 208.980400 1.983797 1.958203 207.972460 205.589910 101.019700 101.019700 1313.100000 1348.028986 1313.100000 1327.593381 2.157777 2.038560 1304.500000 1251.855072 499.671949 479.162305 298.000000 298.000000 298.000000 298.000000 2.141961 1.903748 256.000000 240.164344 115.500000 97.140711 22590.000000 22590.000000 22590.000000 22590.000000 1.954297 1.703420 22588.571000 22434.160000 10724.374500 10410.932005 326.100000 326.100000 326.100000 326.100000 1.767732 1.675400 349.000000 218.696600 162.895331 169.075862 105.000000 105.000000 105.000000 105.000000 2.034410 1.747165 104.778000 102.675000 51.635000 51.680482 332.500000 406.960000 317.883627 376.032878 1.633977 1.612989 429.974170 401.440000 214.986150 213.300452 7.000000 7.000000 7.000000 7.000000 2.141963 1.949739 6.000000 6.992200 3.000000 3.000000 185.000000 14.000000 0.0 3.000000 40.000000 105.000000 120.000000 12.800000 66.000000 4.000000 0.0 4.000000 12.000000 99.925000 100.000000 20.000000 15.000000 3.000000 0.0 3.300000 24.000000 5.000000 75.000000 79.500000 34.900000 14.000000 30.000000 35.380000 45.000000 98.000000 20.000000 41.000000 46.000000 18.000000 19.000000 5.000000 0.0 4.000000 16.700000 9.000000 96.710000 99.976000 99.992000 6.000000 64.000000 45.000000 50.997450 7.000000 99.995000 31.500000 99.200000 83.500000 66.700000 4.000000 0.0 3.000000 24.000000 98.000000 4.998000 185.000000 6.000000 0.0 12.000000 6.000000 4.000000 5.000000 5.000000 5.000000 5.000000 5.000000 16.000000 7.000000 25.000000 55.000000 14.000000 97.240000 10.000000 45.000000 5.800000 64.000000 8.000000 7.000000 19.000000 14.000000 0.0 0.0 0.0
In [ ]:
df_merge['critical_temp'].iplot(
    kind='hist',
    bins=100,
    xTitle='Critical Temperature',
    linecolor='black',
    yTitle='count',
    title='Histogram of Critical Temperature')
In [ ]:
scatter_cols = ['number_of_elements','mean_atomic_mass','mean_atomic_radius','critical_temp']
df_scatter = df_merge[scatter_cols]
df_scatter.scatter_matrix()

Begin Modeling¶

In [ ]:
# Subset of columns to transform
scale_cols = df_merge.columns[df_merge.columns != 'critical_temp']

# Scale Columns
sc = StandardScaler()
df_merge[scale_cols] = sc.fit_transform(df_merge[scale_cols])
In [ ]:
#Specififying Stratified Kfold for cv.
kfcv = KFold(n_splits=10,random_state=0,shuffle=True)
In [ ]:
#Set target and feature columns
target_col = ['critical_temp']
feature_cols = df_merge.loc[:, ~df_merge.columns.isin(target_col)].columns
y = df_merge.critical_temp
X = df_merge[feature_cols]
In [ ]:
%%time
# Grid search for Linear Regression task 1

lr_grid={"l1_ratio":np.arange(0.0,1.0,0.1), 
      "tol": [1e-9,1e-8,1e-7,1e-6,1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
      "eps":[1e-3, 1e-2, 1e-1,1,10,100]
      }

model=ElasticNetCV(random_state = 0,max_iter=10000)

model_gs=GridSearchCV(model,
                       lr_grid,
                       cv = kfcv,
                       n_jobs=-1,
                       scoring = "neg_root_mean_squared_error")

model_gs.fit(X,y)
best_params = model_gs.best_params_
print(f'Grid Search Best Parameters{best_params}')
Grid Search Best Parameters{'eps': 0.001, 'l1_ratio': 0.5, 'tol': 0.1}
CPU times: user 11 s, sys: 5.65 s, total: 16.7 s
Wall time: 9min 58s
In [ ]:
model_ = ElasticNetCV(l1_ratio =.5 ,
                        tol =0.1,
                        eps =0.001,
                        random_state = 0,
                        max_iter = 10000)

model_score = cross_validate(model_, X, y,
                            scoring='neg_root_mean_squared_error',
                            cv=kfcv,
                            return_estimator=True,
                            n_jobs=-1,
                            return_train_score=True)

model_results = pd.DataFrame(model_score)
model_results.loc['mean'] = model_results.mean()
print(model_results.to_markdown())
|      |   fit_time |   score_time | estimator                                             |   test_score |   train_score |
|:-----|-----------:|-------------:|:------------------------------------------------------|-------------:|--------------:|
| 0    |   0.790638 |   0.00162292 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -17.8198 |      -17.6992 |
| 1    |   0.790282 |   0.00154591 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -19.5118 |      -17.4999 |
| 2    |   0.767968 |   0.00222802 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -17.8285 |      -17.6565 |
| 3    |   0.786807 |   0.00218701 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -17.6732 |      -17.6302 |
| 4    |   0.754597 |   0.00209999 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -17.8176 |      -17.703  |
| 5    |   0.784165 |   0.0020647  | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -17.1766 |      -17.7352 |
| 6    |   0.762796 |   0.0016861  | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -17.8488 |      -17.7298 |
| 7    |   0.778274 |   0.0014472  | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -18.0257 |      -17.6453 |
| 8    |   0.762333 |   0.00171614 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -17.7077 |      -17.629  |
| 9    |   0.74049  |   0.00208688 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) |     -17.8841 |      -17.6424 |
| mean |   0.771835 |   0.00186849 | nan                                                   |     -17.9294 |      -17.6571 |
In [ ]:
#obtain average coefficent for each feature
df = pd.DataFrame()
for i in range(10):
    df_ = pd.DataFrame(list(zip(abs(model_score['estimator'][i].coef_), X.columns)),columns = ['Coefficient','Feature'])
    df = pd.concat([df_,df],axis=0)

avg_feat_coef = df.groupby('Feature', as_index=False)['Coefficient'].mean()
avg_feat_coef = avg_feat_coef.sort_values(by='Coefficient',ascending=False)
In [ ]:
top_10_features = avg_feat_coef.head(10)

plt.style.use('ggplot')
plt.barh(top_10_features['Feature'],top_10_features['Coefficient'])
plt.title('Top 10 Features in Elastic Net CV Model')
plt.ylabel('Feature')
plt.xlabel('Coefficient Value')
plt.yticks(rotation=30, va='top')
plt.show()